---
title: "Replication Material"
subtitle: "Facts Shape Feelings: An Information Based Framework for Emotional Responses to Violence"
author: "Aidan Milliff"
date: "19 Feb 2020"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = F, message = F, comment = "")
```

```{r load packages}
if (!require('stm')) install.packages('stm'); library('stm')
if (!require('ggplot2')) install.packages('ggplot2'); library('ggplot2')
if (!require('ggthemes')) install.packages('ggthemes'); library('ggthemes')
if (!require('ggpubr')) install.packages('ggpubr'); library('ggpubr')
if (!require('dplyr')) install.packages('dplyr'); library('dplyr')
if (!require('igraph')) install.packages('igraph'); library('igraph')
if (!require('ggraph')) install.packages('ggraph'); library('ggraph')
```

```{r load data}
load("stm_fit.RData")        # fit_graf
load("processed_text.RData") # paras
load("stm_covars.RData")     # covars
load("rep_data.RData")       # df
```

This file replicates the analysis and figures in "Facts Shape Feelings: An Information Based Framework for Emotional Responses to Violence."

Data and model objects from the replication material should be placed in the same directory as this file in order to compile correctly. 

# Topic Models

```{r estimate effect}
#### Covariate/Topic Association Estimations ####
# (used below)
covars    <- left_join(covars, df, by = c("resp" = "ID"))
covars[is.na(covars)] <- 0 

known     <- estimateEffect(1:10 ~ Perp.known, fit_graf, metadata = covars)
prosec    <- estimateEffect(1:10 ~ Prosecuted, fit_graf, metadata = covars) 
blameperp <- estimateEffect(1:10 ~ Blame.perp, fit_graf, metadata = covars) 
blamenone <- estimateEffect(1:10 ~ Blame.noone, fit_graf, metadata = covars)
church    <- estimateEffect(1:10 ~ churchweekly, fit_graf, metadata = covars)
anger     <- estimateEffect(1:10 ~ P1.Angry.x, fit_graf, metadata = covars)
male      <- estimateEffect(1:10 ~ male, fit_graf, metadata = covars)
```

```{r topic corr}
#### Topic Correlation Plot ####

cor     <- topicCorr(fit_graf, method = "huge")
cor_mat <- cor$cor

colnames(cor_mat) <- c("1: Circumstances",
                       "2: Frust. Detectives",
                       "3: Reason/Motive",
                       "4: Finding Out",
                       "5: Anger/Blame",
                       "6: Motive confusion",
                       "7: Frust. Courts",
                       "8: What Ifs",
                       "9: Support/Community",
                       "10: Panic/Anxiety")
rownames(cor_mat) <-  c("1: Circumstances",
                        "2: Frust. Detectives",
                        "3: Reason/Motive",
                        "4: Finding Out",
                        "5: Anger/Blame",
                        "6: Motive confusion",
                        "7: Frust. Courts",
                        "8: What Ifs",
                        "9: Support/Community",
                        "10: Panic/Anxiety")

expand.grid.unique <- function(x, y, include.equals=FALSE)
{
  x <- unique(x)
  
  y <- unique(y)
  
  g <- function(i)
  {
    z <- setdiff(y, x[seq_len(i-include.equals)])
    
    if(length(z)) cbind(x[i], z, deparse.level=0)
  }
  
  do.call(rbind, lapply(seq_along(x), g))
}

vec <- as.data.frame(expand.grid.unique(colnames(cor_mat), rownames(cor_mat)))
vec$cor <- c(cor_mat[1,2:10],
             cor_mat[2,3:10],
             cor_mat[3,4:10],
             cor_mat[4,5:10],
             cor_mat[5,6:10],
             cor_mat[6,7:10],
             cor_mat[7,8:10],
             cor_mat[8,9:10],
             cor_mat[9,10])
colnames(vec) <- c("x", "y", "r")

vec_sm <- vec %>% filter(x %in% c("3: Reason/Motive", "4: Finding Out", "5: Anger/Blame", "6: Motive confusion") &
                           y %in% c("3: Reason/Motive", "4: Finding Out", "5: Anger/Blame", "6: Motive confusion"))

graph_cors <-  vec_sm %>% 
                    graph_from_data_frame(directed = F)

ggraph(graph_cors) +
  geom_edge_link(aes(color = r,
                     edge_width = abs(r),
                     edge_alpha = abs(r))) +
  geom_node_point(color = "white", size = 5) +
  geom_node_text(aes(label = name), repel = T, vjust = 1) +
  guides(edge_alpha = "none",
         edge_width = "none") +
  scale_edge_colour_gradient2(low = gray.colors(3)[3],
                              mid = gray.colors(3)[2],
                              high = gray.colors(3)[1]) +
  theme_minimal() +
  labs(title = "Topic Correlation Plot")
```

```{r effects plots, results='hide'}
#### New Effects Table ####

# use "plot" to extract predicted probabilities
# this gives a convenient set of values like expected value of a topic proportion based on a given covariate level
# but also some messy plots, so results are hidden

known_plot     <- plot(known, method = "difference", covariate = "Perp.known",
                       cov.value1 = 1, cov.value2 = 0, topics = c(2:10))
prosec_plot    <- plot(prosec, method = "difference", covariate = "Prosecuted",
                       cov.value1 = 1, cov.value2 = 0, topics = c(2:10))
church_plot    <- plot(church, method = "difference", covariate = "churchweekly",
                       cov.value1 = 1, cov.value2 = 0, topics = c(2:10))
anger_plot     <- plot(anger, method = "difference", covariate = "P1.Angry.x",
                       cov.value1 = 5, cov.value2 = 2, topics = c(2:10))
male_plot      <- plot(male, method = "difference",
                       covariate = "male", cov.value1 = 1, cov.value2 = 0, topics = c(2:10))
```

```{r figure A3, fig.cap="Appendix Figure A3"}
# next, extract predictor, topic, predicted probabilities at different values from the "plot" object

# This chunk creates the appendix figure

known_mat   <- cbind(known_plot$topics, unlist(known_plot$means), unlist(known_plot$cis)[c(1,3,5,7,9,11,13,15,17)], unlist(known_plot$cis)[c(2,4,6,8,10,12,14,16,18)])
prosec_mat  <- cbind(prosec_plot$topics, unlist(prosec_plot$means), unlist(prosec_plot$cis)[c(1,3,5,7,9,11,13,15,17)], unlist(prosec_plot$cis)[c(2,4,6,8,10,12,14,16,18)])
church_mat  <- cbind(church_plot$topics, unlist(church_plot$means), unlist(church_plot$cis)[c(1,3,5,7,9,11,13,15,17)], unlist(church_plot$cis)[c(2,4,6,8,10,12,14,16,18)])
anger_mat   <- cbind(anger_plot$topics, unlist(anger_plot$means), unlist(anger_plot$cis)[c(1,3,5,7,9,11,13,15,17)], unlist(anger_plot$cis)[c(2,4,6,8,10,12,14,16,18)])
male_mat    <- cbind(male_plot$topics, unlist(male_plot$means), unlist(male_plot$cis)[c(1,3,5,7,9,11,13,15,17)], unlist(male_plot$cis)[c(2,4,6,8,10,12,14,16,18)])
pp_table <- rbind.data.frame(known_mat, prosec_mat, church_mat, anger_mat, male_mat)
colnames(pp_table) <- c("Topic", "Mean", "ci_lo", "ci_hi")
pp_table$predictor <- c(rep("Perpetrator Known?", times = 9),
                        rep("Perpetrator Prosecuted?", times = 9),
                        rep("Weekly Church Attender?", times = 9),
                        rep("Self-Reported Anger (1-5)", times = 9),
                        rep("Male?", times = 9))
pp_table$Topic <- as.factor(pp_table$Topic)
levels(pp_table$Topic) <- c("2: Frust. Detectives",
                            "3: Reason/Motive",
                            "4: Finding Out",
                            "5: Anger/Blame",
                            "6: Motive confusion",
                            "7: Frust. Courts",
                            "8: What Ifs",
                            "9: Support/Cmty.",
                            "10: Panic/Anxiety")
pp_table$sig <- ifelse(pp_table$ci_lo < 0 & 0 < pp_table$ci_hi, "No", "Yes")
pp_table$sig <- as.factor(pp_table$sig)



out <- ggplot(pp_table) +  facet_wrap(~Topic) + geom_point(aes(y = Mean, x = predictor, color = sig, shape = sig)) + 
                    geom_pointrange(aes(ymin = ci_lo, ymax = ci_hi, color = sig, x = predictor, y = Mean, shape = sig)) + geom_hline(yintercept = 0, color = "grey") +
                    theme_minimal() + scale_color_grey()  + coord_flip(expand = T) + scale_x_discrete(expand = c(0,2)) + 
                    labs(title = "Predicted Difference in Topic Proportions",
                         subtitle = "Change in topic proportions associated with five covariates",
                         y = "Difference in Topic Proportion",
                         x = "Predictor",
                         shape  = "p < 0.05",
                         color = "p < 0.05") +
                          scale_y_continuous(breaks = c(-0.025, 0, 0.025), labels = c(-0.025, 0, 0.025))
out # Figure A3
```

```{r figure 4, fig.cap="Figure 4"}

# This chunk creates figure 4 from the paper

pp_restrict <- pp_table %>% filter(predictor %in% c("Perpetrator Known?", "Self-Reported Anger (1-5)", "Perpetrator Prosecuted?") &
                                     Topic %in% c("3: Reason/Motive", "5: Anger/Blame", "6: Motive confusion",
                                                  "4: Finding Out", "10: Panic/Anxiety", "2: Frust. Detectives"))

out_r <- ggplot(pp_restrict) +  facet_wrap(~Topic) + geom_point(aes(y = Mean, x = predictor, color = sig, shape = sig)) + 
  geom_pointrange(aes(ymin = ci_lo, ymax = ci_hi, color = sig, x = predictor, y = Mean, shape = sig)) + geom_hline(yintercept = 0, color = "grey") +
  theme_minimal() + scale_color_grey()  + 
  coord_flip(expand = T) +
  # scale_x_discrete(expand = c(0,4)) +
  scale_y_continuous(breaks = c(-0.025, 0, 0.025), labels = c("-0.025", "0", "0.025")) +
  theme(axis.text.x = element_text(angle = 60)) +
  labs(title = "Predicted Difference in Topic Proportions",
       subtitle = "Change in topic proportions associated with three covariates",
       y = "Difference in Topic Proportion",
       x = "Predictor",
       shape  = "p < 0.05",
       color = "p < 0.05")
out_r # Figure 4
```

```{r topic quality, fig.cap="Appendix Figure C1"}
# STM built in function to create topic quality plot
topicQuality(fit_graf, paras$documents)
```

```{r top stems}
labelTopics(fit_graf)
```



# Distribution Plots

```{r dist plots}
# Individual Plots that make up Figures 2, A1, and A2

perpknow <- ggplot(data = df,
                   aes(x = P1.Angry,
                       group = Perp.known,
                       fill = factor(Perp.known))) +
            geom_bar(position=position_dodge(preserve = "single"),
                     width=.5) + 
            labs(title = "Knowing the Identity of the Perpetrator",
                 x = "Anger Score",
                 y = "Count",
                 fill = "Identity\nknown") +
            scale_fill_grey(labels = c("No", "Yes")) +
            theme_minimal() + ylim(c(0,16))

perpblame <- ggplot(data = df,
                    aes(x = P1.Angry,
                        group = Blame.perp,
                        fill = factor(Blame.perp))) +
             geom_bar(position=position_dodge(preserve = "single"),
                      width=.5) + 
             labs(title = "Blaming the Perpetrator",
                  y = "Count",
                  x = "Anger Score",
                  fill = "Blame\nPerp.") +
             scale_fill_grey(labels = c("No", "Yes")) + 
             theme_minimal() + ylim(c(0,16))

friendblame <- ggplot(data = df,
                      aes(x = P1.Angry,
                          group = Blame.friends,
                          fill = factor(Blame.friends))) +
               geom_bar(position=position_dodge(preserve = "single"),
                        width=.5) + 
               labs(title = "Blaming Friends of Victim",
                    y = "Count",
                    x = "Anger Score",
                    fill = "Blame\nFriends") +
               scale_fill_grey(labels = c("No", "Yes")) +
               theme_minimal() + ylim(c(0,16))

vicblame <- ggplot(data = df,
                   aes(x = P1.Angry,
                       group = Blame.deceased,
                       fill = factor(Blame.deceased))) +
            geom_bar(position=position_dodge(preserve = "single"),
                     width=.5) + 
            labs(title = "Blaming Victim",
                 x = "Anger Score",
                 y = "Count",
                 fill = "Blame\nVictim") +
            scale_fill_grey(labels = c("No", "Yes")) + 
            theme_minimal() + ylim(c(0,16))

noblame <- ggplot(data = df,
                  aes(x = P1.Angry,
                      group = Blame.noone,
                      fill = factor(Blame.noone))) +
           geom_bar(position=position_dodge(preserve = "single"),
                    width=.5) + 
           labs(title = "Blaming Nobody",
                x = "Anger Score",
                y = "Count",
                fill = "Blame\nNobody") +
           scale_fill_grey(labels = c("No", "Yes")) +
           theme_minimal() + ylim(c(0,16))

asarep <- ggplot(data = subset(df, !is.na(Asa.rep)),
                 aes(x = P1.Angry,
                     group = Asa.rep,
                     fill = factor(Asa.rep))) +
          geom_bar(position=position_dodge(preserve = "single"),
                   width=.5) + 
          labs(title = "Experience of Justice System",
               x = "Anger Score",
               y = "Count",
               fill = "Prosec.\nGood?") +
          scale_fill_grey(labels = c("No", "Yes")) + 
          theme_minimal() + ylim(c(0,20))

detgood <- ggplot(data = df,
                  aes(x = P1.Angry,
                      group = det.good,
                      fill = factor(det.good))) +
           geom_bar(position=position_dodge(preserve = "single"),
                    width=.5) + 
           labs(title = "Experience of Justice System",
                x = "Anger Score",
                y = "Count",
                fill = "Detec.\nGood?") +
           scale_fill_grey(labels = c("No", "Yes")) + 
           theme_minimal() + ylim(c(0,16))

punishv <- ggplot(data = df,
                  aes(x = P1.Angry,
                      group = Punish.viol,
                      fill = factor(Punish.viol))) +
           geom_bar(position=position_dodge(preserve = "single"),
                    width=.5) + 
           labs(title = "Punishment Preferences",
                x = "Anger Score",
                y = "Count",
                fill = "Prefer\nViolence") +
           scale_fill_grey(labels = c("No", "Yes")) + 
           theme_minimal() + ylim(c(0,16))

punishj <- ggplot(data = df,
                  aes(x = P1.Angry,
                      group = Punish.jail,
                      fill = factor(Punish.jail))) +
           geom_bar(position=position_dodge(preserve = "single"),
                    width=.5) + 
           labs(title = "Punishment Preferences",
                x = "Anger Score",
                y = "Count",
                fill = "Prefer\nJail") +
           scale_fill_grey(labels = c("No", "Yes")) + 
           theme_minimal() + ylim(c(0,16))

angertime <- ggplot(data = subset(df),
                    aes(x = Timesince,
                        y = P1.Angry)) +
             geom_density_2d(color = "darkgrey") +
             theme_minimal() +
             geom_point(alpha = 0.4, color = "black") +
             labs(title = "Anger and Time since Homicide",
                  x = "Time since homicide (weeks)",
                  y = "Self Reported Anger (out of five)")

# Combined Plots

allplot1 <- ggarrange(perpknow, 
                      perpblame,
                      friendblame, 
                      noblame,
                      common.legend = F)

allplot2 <- ggarrange(detgood,
                      punishv,
                      asarep,
                      punishj)

angerplot <- ggarrange(angertime,
                       perpknow,
                       detgood,
                       asarep,
                       common.legend = F)
```

```{r allplot1, echo = F, fig.cap="Allplot 1 (Figure 2 in Paper)"}
allplot1
```

```{r allplot2, echo = F, fig.cap="Allplot 2 (Figure A2 in Paper"}
allplot2
```

```{r angerplot, echo = F, fig.cap="Anger Combined Plot (Figure A1 in Paper)"}
angerplot
```

# Online Appendix D

```{r PANAS}
# PANAS t-tests comparing sample data to population averages for PANAS

t.test2 <- function(m1,m2,s1,s2,n1,n2,m0=0,equal.variance=FALSE)
{
    if( equal.variance==FALSE ) 
    {
        se <- sqrt( (s1^2/n1) + (s2^2/n2) )
        # welch-satterthwaite df
        df <- ( (s1^2/n1 + s2^2/n2)^2 )/( (s1^2/n1)^2/(n1-1) + (s2^2/n2)^2/(n2-1) )
    } else
    {
        # pooled standard deviation, scaled by the sample sizes
        se <- sqrt( (1/n1 + 1/n2) * ((n1-1)*s1^2 + (n2-1)*s2^2)/(n1+n2-2) ) 
        df <- n1+n2-2
    }      
    t <- (m1-m2-m0)/se 
    dat <- c(m1-m2, se, t, 2*pt(-abs(t),df))    
    names(dat) <- c("Difference of means", "Std Error", "t", "p-value")
    return(dat) 
}

P1neg <- t.test2(mean(df$P1.Negative.Affect), 17.4, sd(df$P1.Negative.Affect), 6.2, 31, 100)
P1pos <- t.test2(mean(df$P1.Positive.Affect), 33.3, sd(df$P1.Positive.Affect), 7.2, 31, 100)
P2neg <- t.test2(mean(df$P2.Negative.Affect), 17.4, sd(df$P2.Negative.Affect), 6.2, 31, 100)
P2pos <- t.test2(mean(df$P2.Positive.Affect), 33.3, sd(df$P2.Positive.Affect), 7.2, 31, 100)

P12pos   <- t.test2(mean(df$P1.Positive.Affect), mean(df$P2.Positive.Affect), sd(df$P1.Positive.Affect), sd(df$P2.Positive.Affect), 31, 31)
P12neg   <- t.test2(mean(df$P1.Negative.Affect), mean(df$P2.Negative.Affect), sd(df$P1.Negative.Affect), sd(df$P2.Negative.Affect), 31, 31)

PANAS <- rbind.data.frame(P1neg, P1pos, P2neg, P2pos, P12neg, P12pos)

PANAS <- cbind(c("Present Day", "Present Day", "Post-Homicide", "Post-Homicide", "Post Homicide to Present Day", "Post Homicide to Present Day"),
                          c("Negative Affect", "Positive Affect", "Negative Affect", "Positive Affect", "Negative Affect", "Positive Affect"),
                          PANAS)
colnames(PANAS) <- c("Question", "Valence", "Difference in Means", "Std. Error", "T-score", "P-value")

knitr::kable(PANAS)
```

# Emotion Correlations
```{r}
summary(df$P1.Fear.Index)
summary(df$P1.Anger.Index)
cor(df$P1.Anger.Index, df$P1.Fear.Index, use = 'pairwise.complete.obs')
summary(df$)
```

